webPagesNumberDict = {}
print "This script is going to split a big file into small ones"
inputFileHandler = open("/data1/team/weijiang/dataset/clueweb2009-related/clueweb09spam.Fusion")
for index,eachLine in enumerate(inputFileHandler.readlines()):
    #print index
    elements = eachLine.split(" ")
    webPagesNumberDict[elements[1].strip()] = elements[0].strip()
    #print "ham probability:",
    #print "web pages:",

print webPagesNumberDict["clueweb09-en0000-00-00017"]
